Clustering


Histogram of degrees

Trajectory analysis
#for each ID, find the minimum round (1 to 10) for which degree is 0
sample.i = clust.res[clust.res$degree<=2 & clust.res$round!=0,]
sample.i$round = as.numeric(sample.i$round) - 1
isolationRound <- aggregate(round ~ ID, data = sample.i, FUN = min, na.rm = TRUE)
colnames(isolationRound)[c(1:2)] = c("ID","roundZero")
#subtract the minimum round for which degree is 0 to obtain rounds to isolation
sample.traj = merge(clust.res[clust.res$round!=0,], isolationRound, by="ID", all.x=TRUE)
sample.traj$round = as.numeric(sample.traj$round) - 1
sample.traj$roundToIsolation = sample.traj$round - sample.traj$roundZero
#remove all "positive" rounds
sample.traj = subset(sample.traj, sample.traj$roundToIsolation<=0)
#trajectories
sum_traj <- sample.traj %>%
group_by(roundToIsolation) %>%
summarise(mean = mean(degree, na.rm = TRUE),
ci = 1.96 * sd(degree, na.rm = TRUE)/sqrt(n()))
sum_traj %>%
ggplot(aes(x = roundToIsolation, y = mean)) +
geom_line(aes()) +
geom_errorbar(aes(ymin = mean - ci, ymax = mean + ci),
width = .1) +
theme_bw() +
geom_point(size = 4) +
labs(
title = "Trajectories of degrees",
x = "Round to isolation",
y = "Degree",
color = NULL,
linetype = NULL) +
theme(plot.title = element_text(hjust = 0.5))

Comparison of degree trajectory and other variables
#Plot the degree trajectory (rounds to isolation) and another variable on the same graph
sample.cc = sample.traj
sum_traj <- sample.cc %>%
group_by(roundToIsolation) %>%
summarise(mean.degree = mean(degree),
ci.degree = 1.96 * sd(degree)/sqrt(n()))
sum_coef <- sample.cc %>%
group_by(roundToIsolation) %>%
summarise(mean = mean(coefLocal, na.rm=TRUE),
ci = 1.96 * sd(coefLocal, na.rm=TRUE)/sqrt(n()))
sum_alterPrevWealth <- sample.cc %>%
group_by(roundToIsolation) %>%
summarise(mean = mean(alterPrevWealth, na.rm=TRUE),
ci = 1.96 * sd(alterPrevWealth, na.rm=TRUE)/sqrt(n()))
sum_egoPrevWealth <- sample.cc %>%
group_by(roundToIsolation) %>%
summarise(mean = mean(egoPrevWealth, na.rm=TRUE),
ci = 1.96 * sd(egoPrevWealth, na.rm=TRUE)/sqrt(n()))
sum_wealthDiff <- sample.cc %>%
group_by(roundToIsolation) %>%
summarise(mean = mean(wealthDiff, na.rm=TRUE),
ci = 1.96 * sd(wealthDiff, na.rm=TRUE)/sqrt(n()))
sum_alterBehavior <- sample.cc %>%
group_by(roundToIsolation) %>%
summarise(mean = mean(alterBehavior, na.rm=TRUE),
ci = 1.96 * sd(alterBehavior, na.rm=TRUE)/sqrt(n()))
sample.cc <- sample.cc %>%
mutate(behavior =
case_when(
behavior=="C" ~ 1,
behavior=="D" ~ 0
))
sum_behavior <- sample.cc %>%
group_by(roundToIsolation) %>%
summarise(mean = mean(behavior, na.rm=TRUE),
ci = 1.96 * sd(behavior, na.rm=TRUE)/sqrt(n()))
cc.compare = merge(data.frame(sum_traj),data.frame(sum_wealthDiff),by="roundToIsolation")
scale=100
g.compare = ggplot(data=cc.compare, aes(x=roundToIsolation,y=mean.degree)) +
geom_line(aes(color="Degree")) +
geom_errorbar(aes(x = roundToIsolation, ymin = mean.degree - ci.degree, ymax = mean.degree + ci.degree, color="Degree"),width = .1) +
geom_point(aes(x = roundToIsolation, y=mean.degree, color="Degree"),size = 4) +
geom_line(aes(y = mean/scale, color="Average wealth difference in previous round")) +
geom_errorbar(aes(x = roundToIsolation, ymin = mean/scale - ci/scale, ymax = mean/scale + ci/scale, color="Average wealth difference in previous round"),width = .1) +
geom_point(aes(x = roundToIsolation, y=mean/scale, color="Average wealth difference in previous round"),size = 4) +
scale_y_continuous(
# Features of the first axis
name = "Degree",
# Add a second axis and specify its features
sec.axis = sec_axis(~.*scale, name="Average wealth difference in previous round")) +
labs(
title = "Trajectories of degrees and wealth difference",
x = "Round to isolation",
y = "Degree",
color = "") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_manual(values = c("orange2", "gray30"))
g.compare

Does inequality cause isolation?
###does initial behavior predict whether someone will be ever isolated?
sample.reg = clust
sample.reg$isolated = ifelse(sample.reg$degree.1<=2 | sample.reg$degree.2<=2 | sample.reg$degree.3<=2 | sample.reg$degree.4<=2 | sample.reg$degree.5<=2|
sample.reg$degree.6<=2 | sample.reg$degree.7<=2 | sample.reg$degree.8<=2 | sample.reg$degree.9<=2 | sample.reg$degree.10<=2,
1,0)
sample.reg$inequality = relevel(factor(sample.reg$inequality),ref="none")
#inequality
reg.everIsolated.inequality = miceadds::glm.cluster(isolated ~ inequality*visibleWealth,
data = sample.reg,
cluster = "gameID",
family = binomial(link = "logit"))
alpha <- 0.05
x.everIsolated.inequality <- summary(reg.everIsolated.inequality)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9831535 0.4345736 -6.8645527 6.669990e-12
## inequalityhigh 1.1763561 0.5391228 2.1819816 2.911089e-02
## inequalitylow 0.5718801 0.4965976 1.1515965 2.494869e-01
## visibleWealth 0.6049310 0.5216184 1.1597193 2.461631e-01
## inequalityhigh:visibleWealth -0.8844956 0.6346571 -1.3936590 1.634206e-01
## inequalitylow:visibleWealth -0.3721900 0.6069544 -0.6132092 5.397380e-01
y.everIsolated.inequality <- confint(reg.everIsolated.inequality, level=1-alpha)
coef.everIsolated.inequality <- x.everIsolated.inequality[,1]
LowerCL.everIsolated.inequality <- y.everIsolated.inequality[,1]
UpperCL.everIsolated.inequality <- y.everIsolated.inequality[,2]
p.everIsolated.inequality <-x.everIsolated.inequality[,4]
#odds ratio
z.everIsolated.inequality <- rbind(
cbind(exp(coef.everIsolated.inequality["inequalityhigh"]), exp(LowerCL.everIsolated.inequality["inequalityhigh"]), exp(UpperCL.everIsolated.inequality["inequalityhigh"]), p.everIsolated.inequality["inequalityhigh"]),
cbind(exp(coef.everIsolated.inequality["inequalitylow"]), exp(LowerCL.everIsolated.inequality["inequalitylow"]), exp(UpperCL.everIsolated.inequality["inequalitylow"]), p.everIsolated.inequality["inequalitylow"]),
cbind(exp(coef.everIsolated.inequality["visibleWealth"]), exp(LowerCL.everIsolated.inequality["visibleWealth"]), exp(UpperCL.everIsolated.inequality["visibleWealth"]), p.everIsolated.inequality["visibleWealth"]),
cbind(exp(coef.everIsolated.inequality["inequalityhigh:visibleWealth"]), exp(LowerCL.everIsolated.inequality["inequalityhigh:visibleWealth"]), exp(UpperCL.everIsolated.inequality["inequalityhigh:visibleWealth"]), p.everIsolated.inequality["inequalityhigh:visibleWealth"]),
cbind(exp(coef.everIsolated.inequality["inequalitylow:visibleWealth"]), exp(LowerCL.everIsolated.inequality["inequalitylow:visibleWealth"]), exp(UpperCL.everIsolated.inequality["inequalitylow:visibleWealth"]), p.everIsolated.inequality["inequalitylow:visibleWealth"])
)
z.everIsolated.inequality = data.frame(z.everIsolated.inequality)
names(z.everIsolated.inequality) = c("OR", "Lower CL", "Upper CL", "P-value")
z.everIsolated.inequality
## OR Lower CL Upper CL P-value
## inequalityhigh 3.2425373 1.1271528 9.327971 0.02911089
## inequalitylow 1.7715947 0.6693599 4.688879 0.24948690
## visibleWealth 1.8311258 0.6587426 5.090033 0.24616310
## inequalityhigh:visibleWealth 0.4129224 0.1190275 1.432483 0.16342059
## inequalitylow:visibleWealth 0.6892233 0.2097584 2.264647 0.53973801
#gini
reg.everIsolated.gini = miceadds::glm.cluster(isolated ~ gini*visibleWealth,
data = sample.reg,
cluster = "gameID",
family = binomial(link = "logit"))
alpha <- 0.05
x.everIsolated.gini <- summary(reg.everIsolated.gini)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9955952 0.3494693 -8.571841 1.018438e-17
## gini 2.9595094 1.3271235 2.230018 2.574624e-02
## visibleWealth 0.6487687 0.4219287 1.537627 1.241400e-01
## gini:visibleWealth -2.2453421 1.5495229 -1.449054 1.473226e-01
y.everIsolated.gini <- confint(reg.everIsolated.gini, level=1-alpha)
coef.everIsolated.gini <- x.everIsolated.gini[,1]
LowerCL.everIsolated.gini <- y.everIsolated.gini[,1]
UpperCL.everIsolated.gini <- y.everIsolated.gini[,2]
p.everIsolated.gini <-x.everIsolated.gini[,4]
#odds ratio
z.everIsolated.gini <- rbind(
cbind(exp(coef.everIsolated.gini["gini"]), exp(LowerCL.everIsolated.gini["gini"]), exp(UpperCL.everIsolated.gini["gini"]), p.everIsolated.gini["gini"]),
cbind(exp(coef.everIsolated.gini["visibleWealth"]), exp(LowerCL.everIsolated.gini["visibleWealth"]), exp(UpperCL.everIsolated.gini["visibleWealth"]), p.everIsolated.gini["visibleWealth"]),
cbind(exp(coef.everIsolated.gini["gini:visibleWealth"]), exp(LowerCL.everIsolated.gini["gini:visibleWealth"]), exp(UpperCL.everIsolated.gini["gini:visibleWealth"]), p.everIsolated.gini["gini:visibleWealth"])
)
z.everIsolated.gini = data.frame(z.everIsolated.gini)
names(z.everIsolated.gini) = c("OR", "Lower CL", "Upper CL", "P-value")
z.everIsolated.gini
## OR Lower CL Upper CL P-value
## gini 19.2885062 1.431030906 259.984932 0.02574624
## visibleWealth 1.9131837 0.836778160 4.374244 0.12413996
## gini:visibleWealth 0.1058913 0.005080471 2.207073 0.14732257
#initScore
reg.everIsolated.initScore = miceadds::glm.cluster(isolated ~ initScore*visibleWealth,
data = sample.reg[sample.reg$inequality=="high",],
cluster = "gameID",
family = binomial(link = "logit"))
alpha <- 0.05
x.everIsolated.initScore <- summary(reg.everIsolated.initScore)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.8753386384 0.2981468924 -6.2899822 3.175024e-10
## initScore 0.0001287980 0.0005171585 0.2490494 8.033226e-01
## visibleWealth 0.0482782643 0.4680608749 0.1031453 9.178477e-01
## initScore:visibleWealth -0.0007704406 0.0010142138 -0.7596432 4.474679e-01
y.everIsolated.initScore <- confint(reg.everIsolated.initScore, level=1-alpha)
coef.everIsolated.initScore <- x.everIsolated.initScore[,1]
LowerCL.everIsolated.initScore <- y.everIsolated.initScore[,1]
UpperCL.everIsolated.initScore <- y.everIsolated.initScore[,2]
p.everIsolated.initScore <-x.everIsolated.initScore[,4]
#odds ratio
z.everIsolated.initScore <- rbind(
cbind(exp(coef.everIsolated.initScore["initScore"]), exp(LowerCL.everIsolated.initScore["initScore"]), exp(UpperCL.everIsolated.initScore["initScore"]), p.everIsolated.initScore["initScore"]),
cbind(exp(coef.everIsolated.initScore["visibleWealth"]), exp(LowerCL.everIsolated.initScore["visibleWealth"]), exp(UpperCL.everIsolated.initScore["visibleWealth"]), p.everIsolated.initScore["visibleWealth"]),
cbind(exp(coef.everIsolated.initScore["initScore:visibleWealth"]), exp(LowerCL.everIsolated.initScore["initScore:visibleWealth"]), exp(UpperCL.everIsolated.initScore["initScore:visibleWealth"]), p.everIsolated.initScore["initScore:visibleWealth"])
)
z.everIsolated.initScore = data.frame(z.everIsolated.initScore)
names(z.everIsolated.initScore) = c("OR", "Lower CL", "Upper CL", "P-value")
z.everIsolated.initScore
## OR Lower CL Upper CL P-value
## initScore 1.0001288 0.9991156 1.001143 0.8033226
## visibleWealth 1.0494626 0.4193270 2.626522 0.9178477
## initScore:visibleWealth 0.9992299 0.9972455 1.001218 0.4474679
Links broken by ego, by wealth difference and wealth visibility
How generous are people when alter defects, depending on wealth difference and visibility?
ldata = ldata %>% mutate(decile=ntile(wealthDiff,10))
sum_linkBroken <- ldata %>%
group_by(decile, visibleWealth) %>%
summarise(linkBroken = prop.table(table(connecting))["breakLink"])
sum_linkBroken = data.frame(sum_linkBroken)
g.linkBroken <- ggplot(data=sum_linkBroken,aes(x=decile,y=linkBroken,color = factor(visibleWealth)))+
geom_point() +
ggtitle("Proportions of links broken by ego, by wealth difference and wealth visibility") +
xlab("Wealth difference decile (alter - ego)") +
ylab("Proportions of links broken") +
scale_color_discrete(name="Visibility") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
g.linkBroken

random forest model: wealth difference
ldata.rf = ldata[c("connecting","wealthDiff","visibleWealth","egoWealth","ego_behavior","alter_behavior","round")]
ldata.rf = ldata.rf[complete.cases(ldata.rf),]
ldata.rf$connecting = factor(ldata.rf$connecting)
ldata.rf$round = factor(ldata.rf$round)
ldata.rf = ldata.rf %>% mutate(round = factor(round, labels = make.names(levels(round))))
ldata.rf$ego_behavior=as.factor(ifelse(ldata.rf$ego_behavior==1,"C","D"))
ldata.rf$alter_behavior=as.factor(ifelse(ldata.rf$alter_behavior==1,"C","D"))
ldata.rf$visibleWealth=as.factor(ldata.rf$visibleWealth)
set.seed(10)
control.rf <- trainControl(method='repeatedcv',
number=10,
repeats=3,
search = 'random',
classProbs=TRUE,
summaryFunction = multiClassSummary,
allowParallel = TRUE)
model.weights <- ifelse(ldata.rf$connecting == "breakLink",
(1/table(ldata.rf$connecting)[1]) * 0.5,
(1/table(ldata.rf$connecting)[2]) * 0.5)
rf <- caret::train(connecting ~ wealthDiff + visibleWealth + egoWealth + ego_behavior + alter_behavior + round,
data = ldata.rf,
method = 'rf',
metric = 'AUC',
tuneLength = 30,
verbose=TRUE,
weights=model.weights,
trControl = control.rf)
print(rf)
## Random Forest
##
## 8450 samples
## 6 predictor
## 2 classes: 'breakLink', 'notBreakLink'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 7605, 7604, 7605, 7604, 7605, 7605, ...
## Resampling results across tuning parameters:
##
## mtry logLoss AUC prAUC Accuracy Kappa F1 Sensitivity Specificity Pos_Pred_Value Neg_Pred_Value Precision Recall Detection_Rate
## 1 0.9631347 0.8826105 0.5082646 0.7836688 0.0000000 NaN 0.0000000 1.0000000 NaN 0.7836688 NaN 0.0000000 0.0000000
## 2 0.6282555 0.8887200 0.6147786 0.8579106 0.5321705 0.6161979 0.5293591 0.9486049 0.7405398 0.8796817 0.7405398 0.5293591 0.1145183
## 3 0.5289096 0.8892341 0.6673758 0.8630386 0.5709312 0.6554806 0.6032206 0.9347614 0.7195897 0.8952107 0.7195897 0.6032206 0.1304950
## 5 0.5062031 0.8861534 0.6799610 0.8595669 0.5705385 0.6585521 0.6263766 0.9239390 0.6957354 0.8996513 0.6957354 0.6263766 0.1355041
## 6 0.5217612 0.8837314 0.6747018 0.8565697 0.5651794 0.6555052 0.6313027 0.9187547 0.6828564 0.9003427 0.6828564 0.6313027 0.1365696
## 7 0.5172092 0.8811596 0.6680525 0.8530595 0.5560289 0.6487194 0.6276577 0.9152819 0.6724423 0.8991108 0.6724423 0.6276577 0.1357806
## 8 0.5200527 0.8792128 0.6605318 0.8499428 0.5483137 0.6431316 0.6256530 0.9118584 0.6629423 0.8982954 0.6629423 0.6256530 0.1353470
## 10 0.5571118 0.8743746 0.6528501 0.8447350 0.5340662 0.6323300 0.6176285 0.9074282 0.6488478 0.8958708 0.6488478 0.6176285 0.1336110
## 11 0.5619592 0.8731077 0.6478467 0.8439072 0.5307848 0.6294931 0.6134370 0.9075292 0.6475558 0.8948617 0.6475558 0.6134370 0.1327038
## 12 0.5621255 0.8720096 0.6422759 0.8415402 0.5244085 0.6246832 0.6099712 0.9054652 0.6412885 0.8937994 0.6412885 0.6099712 0.1319542
## 13 0.5709888 0.8713760 0.6399173 0.8415797 0.5244420 0.6246802 0.6101573 0.9054653 0.6411095 0.8938643 0.6411095 0.6101573 0.1319935
## 14 0.5746133 0.8706794 0.6360188 0.8411455 0.5247496 0.6254326 0.6134330 0.9040052 0.6389073 0.8944841 0.6389073 0.6134330 0.1327036
## Balanced_Accuracy
## 0.5000000
## 0.7389820
## 0.7689910
## 0.7751578
## 0.7750287
## 0.7714698
## 0.7687557
## 0.7625283
## 0.7604831
## 0.7577182
## 0.7578113
## 0.7587191
##
## AUC was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 3.
varImpPlot(rf$finalModel, type=2)

Partial dependence plot: do wealth differrence and wealth visiblity explain isolation?
pred.prob <- function(object, newdata) {
pred <- predict(object, newdata, type="prob")
prob.breakLink <- pred[, "breakLink"]
mean(prob.breakLink)
}
pdp = partial(rf, pred.var = c("wealthDiff","visibleWealth","alter_behavior","egoWealth"), pred.fun = pred.prob, plot = FALSE)
pdp.df = data.frame(
wealthDiff = pdp$wealthDiff,
alter_behavior = pdp$alter_behavior,
visibleWealth = pdp$visibleWealth,
egoWealth = pdp$egoWealth,
breakLink = pdp$yhat
)
# g.pdp <- ggplot(data=pdp.df,aes(x=wealthDiff,y=breakLink,color = factor(visibleWealth),shape=factor(alter_behavior)))+
# geom_point() +
# geom_line() +
# ggtitle("Proportions of links broken by ego, by wealth difference and wealth visibility") +
# xlab("Wealth difference (alter - ego)") +
# ylab("Proportions of links broken") +
# scale_color_discrete(name="Wealth visibility") +
# scale_shape(name="Behavior of alter") +
# theme_classic() +
# theme(plot.title = element_text(hjust = 0.5))
#
# g.pdp
g.visible = ggplot(data = pdp.df[pdp.df$visibleWealth=="Visible" & pdp.df$alter_behavior=="D",],
aes(x = wealthDiff, y = egoWealth, fill = breakLink)) +
geom_tile() +
ggtitle("Proportions of links broken by ego, \nby wealth difference and ego's wealth, \nwhen wealth is visible, \ngiven that alter defected") +
scale_x_continuous("Wealth difference (alter - ego)") +
scale_y_continuous("Wealth of ego") +
scale_fill_continuous(type = "viridis", limits=c(0.3,0.8)) +
labs(fill="Proportion of links \nbroken by ego")
g.invisible = ggplot(data = pdp.df[pdp.df$visibleWealth=="Not visible" & pdp.df$alter_behavior=="D",],
aes(x = wealthDiff, y = egoWealth, fill = breakLink)) +
geom_tile() +
ggtitle("Proportions of links broken by ego, \nby wealth difference and ego's wealth, \nwhen wealth is not visible, \ngiven that alter defected") +
scale_x_continuous("Wealth difference (alter - ego)") +
scale_y_continuous("Wealth of ego") +
scale_fill_continuous(type = "viridis", limits=c(0.3,0.8)) +
labs(fill="Proportion of links \nbroken by ego")
ggarrange(g.visible,g.invisible)

#rich and middle-class people become more generous when wealth is visible
Regression: greater wealth difference and visible wealth
ldata.reg = ldata[c("connecting","wealthDiff","visibleWealth","egoWealth","alter_behavior","round","gameID")]
ldata.reg = ldata.reg[complete.cases(ldata.reg),]
ldata.reg$connecting = ifelse(ldata.reg$connecting=="breakLink",1,0)
reg.link = miceadds::glm.cluster(connecting ~ wealthDiff*visibleWealth + egoWealth + alter_behavior + factor(round),
data = ldata.reg,
cluster = "gameID",
family = binomial(link = "logit"))
summary(reg.link)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.155796e-01 1.985413e-01 3.6041855 3.131334e-04
## wealthDiff 3.425646e-04 7.184800e-05 4.7679076 1.861492e-06
## visibleWealthVisible -1.562342e-01 1.196633e-01 -1.3056157 1.916833e-01
## egoWealth -5.539615e-05 6.585905e-05 -0.8411319 4.002740e-01
## alter_behavior -3.446595e+00 1.676009e-01 -20.5642954 5.732649e-94
## factor(round)1 -1.787068e-01 1.985790e-01 -0.8999281 3.681585e-01
## factor(round)2 -3.115523e-01 1.886328e-01 -1.6516333 9.860932e-02
## factor(round)3 -4.711658e-01 1.845068e-01 -2.5536509 1.066001e-02
## factor(round)4 -2.725019e-01 1.794467e-01 -1.5185670 1.288715e-01
## factor(round)5 -3.606493e-01 1.849159e-01 -1.9503420 5.113537e-02
## factor(round)6 -4.337742e-01 2.143599e-01 -2.0235788 4.301351e-02
## factor(round)7 -2.124201e-01 1.945856e-01 -1.0916538 2.749853e-01
## factor(round)8 -6.917863e-01 2.111869e-01 -3.2757071 1.053978e-03
## factor(round)9 -4.687630e-01 2.372295e-01 -1.9759895 4.815597e-02
## wealthDiff:visibleWealthVisible 2.047481e-05 1.062155e-04 0.1927667 8.471417e-01
random forest model: ego wealth and alter wealth
ldata.rf = ldata[c("connecting","visibleWealth","egoWealth","alterWealth","ego_behavior","alter_behavior","round")]
ldata.rf = ldata.rf[complete.cases(ldata.rf),]
ldata.rf$connecting = factor(ldata.rf$connecting)
ldata.rf$round = factor(ldata.rf$round)
ldata.rf = ldata.rf %>% mutate(round = factor(round, labels = make.names(levels(round))))
ldata.rf$ego_behavior=as.factor(ifelse(ldata.rf$ego_behavior==1,"C","D"))
ldata.rf$alter_behavior=as.factor(ifelse(ldata.rf$alter_behavior==1,"C","D"))
ldata.rf$visibleWealth=as.factor(ldata.rf$visibleWealth)
set.seed(10)
control.rf <- trainControl(method='repeatedcv',
number=10,
repeats=3,
search = 'random',
classProbs=TRUE,
summaryFunction = multiClassSummary,
allowParallel = TRUE)
model.weights <- ifelse(ldata.rf$connecting == "breakLink",
(1/table(ldata.rf$connecting)[1]) * 0.5,
(1/table(ldata.rf$connecting)[2]) * 0.5)
rf <- caret::train(connecting ~ alterWealth + visibleWealth + egoWealth + ego_behavior + alter_behavior + round,
data = ldata.rf,
method = 'rf',
metric = 'AUC',
tuneLength = 30,
verbose=TRUE,
weights=model.weights,
trControl = control.rf)
print(rf)
## Random Forest
##
## 8450 samples
## 6 predictor
## 2 classes: 'breakLink', 'notBreakLink'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 7605, 7604, 7605, 7604, 7605, 7605, ...
## Resampling results across tuning parameters:
##
## mtry logLoss AUC prAUC Accuracy Kappa F1 Sensitivity Specificity Pos_Pred_Value Neg_Pred_Value Precision Recall Detection_Rate
## 1 0.9737396 0.8805074 0.5112470 0.7836688 0.0000000 NaN 0.0000000 1.0000000 NaN 0.7836688 NaN 0.0000000 0.0000000
## 2 0.6289841 0.8880328 0.6145596 0.8563706 0.5193651 0.6028298 0.5054545 0.9532355 0.7497873 0.8748354 0.7497873 0.5054545 0.1093495
## 3 0.5235274 0.8902504 0.6629115 0.8589754 0.5496430 0.6354366 0.5694770 0.9388896 0.7212131 0.8877577 0.7212131 0.5694770 0.1231966
## 5 0.5082313 0.8874332 0.6782981 0.8556220 0.5579350 0.6483526 0.6161593 0.9217243 0.6856848 0.8969963 0.6856848 0.6161593 0.1332952
## 6 0.5138224 0.8844749 0.6705949 0.8521905 0.5521839 0.6452618 0.6223593 0.9156336 0.6716022 0.8979011 0.6716022 0.6223593 0.1346368
## 7 0.5420696 0.8813218 0.6660557 0.8467462 0.5384643 0.6352290 0.6179797 0.9098947 0.6554560 0.8962817 0.6554560 0.6179797 0.1336896
## 8 0.5307923 0.8792238 0.6605041 0.8445764 0.5325949 0.6308217 0.6145149 0.9080827 0.6496237 0.8952135 0.6496237 0.6145149 0.1329399
## 10 0.5467505 0.8754124 0.6497351 0.8394096 0.5185578 0.6201892 0.6066795 0.9036528 0.6359724 0.8928420 0.6359724 0.6066795 0.1312444
## 11 0.5674095 0.8732148 0.6440282 0.8377521 0.5133347 0.6159973 0.6021137 0.9027973 0.6321312 0.8916359 0.6321312 0.6021137 0.1302577
## 12 0.5666535 0.8719989 0.6406575 0.8367664 0.5095281 0.6127452 0.5975550 0.9027977 0.6301633 0.8905235 0.6301633 0.5975550 0.1292718
## 13 0.5674477 0.8713465 0.6363574 0.8352278 0.5044546 0.6085925 0.5926350 0.9021936 0.6269483 0.8892590 0.6269483 0.5926350 0.1282066
## 14 0.5820957 0.8697502 0.6327839 0.8342807 0.5027548 0.6076002 0.5937269 0.9006828 0.6236805 0.8893737 0.6236805 0.5937269 0.1284433
## Balanced_Accuracy
## 0.5000000
## 0.7293450
## 0.7541833
## 0.7689418
## 0.7689965
## 0.7639372
## 0.7612988
## 0.7551661
## 0.7524555
## 0.7501764
## 0.7474143
## 0.7472048
##
## AUC was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 3.
varImpPlot(rf$finalModel, type=2)

Partial dependence plot: do ego vs. alter's wealth and wealth visiblity explain isolation?
pred.prob <- function(object, newdata) {
pred <- predict(object, newdata, type="prob")
prob.breakLink <- pred[, "breakLink"]
mean(prob.breakLink)
}
pdp = partial(rf, pred.var = c("alterWealth","visibleWealth","alter_behavior","egoWealth"), pred.fun = pred.prob, plot = FALSE)
pdp.df = data.frame(
alterWealth = pdp$alterWealth,
alter_behavior = pdp$alter_behavior,
visibleWealth = pdp$visibleWealth,
egoWealth = pdp$egoWealth,
breakLink = pdp$yhat
)
g.visible = ggplot(data = pdp.df[pdp.df$visibleWealth=="Visible" & pdp.df$alter_behavior=="D",],
aes(x = alterWealth, y = egoWealth, fill = breakLink)) +
geom_tile() +
ggtitle("Proportions of links broken by ego, \nby ego and alter's wealth, \nwhen wealth is visible, \ngiven that alter defected") +
scale_x_continuous("Wealth of alter") +
scale_y_continuous("Wealth of ego") +
scale_fill_continuous(type = "viridis", limits=c(0.3,0.8)) +
labs(fill="Proportion of links \nbroken by ego")
g.invisible = ggplot(data = pdp.df[pdp.df$visibleWealth=="Not visible" & pdp.df$alter_behavior=="D",],
aes(x = alterWealth, y = egoWealth, fill = breakLink)) +
geom_tile() +
ggtitle("Proportions of links broken by ego, \nby ego and alter's wealth, \nwhen wealth is not visible, \ngiven that alter defected") +
scale_x_continuous("Wealth of alter") +
scale_y_continuous("Wealth of ego") +
scale_fill_continuous(type = "viridis", limits=c(0.3,0.8)) +
labs(fill="Proportion of links \nbroken by ego")
ggarrange(g.visible,g.invisible)
